# Replication Code McNamee and Zhang, International Organization
#Please replicate using R version 3.3.2. Checkpoint will not work without 3.3.2. This can be installed from https://cran.r-project.org/bin/macosx/old/ 

install.packages("checkpoint")
library(checkpoint)
#Set working directory to replication folder
setwd(".../ReplicationMaterials_update")
checkpoint("2017-12-01")

library(devtools)
library(car)
library(foreign)
library(plm)
library(sandwich)
library(lmtest)
library(psych)
library(stargazer)
library(Rcpp) 
library(ggplot2)  
library(GGally)
library(foreach)  
library(doParallel) 
library(abind) 
library(gsynth)
library(dotwhisker)
library(broom)
library(plyr)
library(psych)
library(gsynth)




#Getting rid of exponential values
options(scipen = 999)

#Loading datasets

load("mergedhuibian.RData")
load("mergedhuibian2.RData")
load("mergedxinjiang2.RData")
load("Russia.RData")
load("ethnicmerged.RData")
load("migr_trim.RData")

mergedhuibian3 <- subset(mergedhuibian2, mergedhuibian2$reform == 1)
mergedhuibian_withoutxinjiang <- subset(mergedhuibian2, mergedhuibian2$name_en != "Xinjiang")
mergedhuibian_withoutjilin <- subset(mergedhuibian2, mergedhuibian2$name_en != "Jilin")
mergedhuibian_withoutheilongjiang <- subset(mergedhuibian2, mergedhuibian2$name_en != "Heilongjiang")
mergedhuibian_withoutinnermongolia  <- subset(mergedhuibian2, mergedhuibian2$name_en != "Inner Mongolia")
mergedhuibian_ring <- subset(mergedhuibian2, mergedhuibian2$name_en == "Xinjiang" | mergedhuibian2$name_en == "Jilin" | mergedhuibian2$name_en == "Inner Mongolia" | mergedhuibian2$name_en == "Heilongjiang" | mergedhuibian2$name_en == "Tibet" | mergedhuibian2$name_en == "Qinghai" | mergedhuibian2$name_en == "Gansu" | mergedhuibian2$name_en == "Ningxia" | mergedhuibian2$name_en == "Shanxi" | mergedhuibian2$name_en == "Shaanxi" | mergedhuibian2$name_en == "Hebei" | mergedhuibian2$name_en == "Liaoning")

migration.panel = pdata.frame(mergedhuibian, index = c("name_en", "year"), drop.index = TRUE, row.names = TRUE)
migration.panel2 = pdata.frame(mergedhuibian2, index = c("name_en", "year"), drop.index = TRUE, row.names = TRUE)
xinjiang.panel = pdata.frame(mergedxinjiang2, index = c("pinyin_1952", "year"), drop.index = TRUE, row.names = TRUE)
russia.panel = pdata.frame(russia, index = c("name_en", "year"), drop.index = TRUE, row.names = TRUE)

migration.panel3 = pdata.frame(mergedhuibian3, index = c("name_en", "year"), drop.index = TRUE, row.names = TRUE)
migration.panelXJ = pdata.frame(mergedhuibian_withoutxinjiang, index = c("name_en", "year"), drop.index = TRUE, row.names = TRUE)
migration.panelJL = pdata.frame(mergedhuibian_withoutjilin, index = c("name_en", "year"), drop.index = TRUE, row.names = TRUE)
migration.panelHLJ = pdata.frame(mergedhuibian_withoutheilongjiang, index = c("name_en", "year"), drop.index = TRUE, row.names = TRUE)
migration.panelIM = pdata.frame(mergedhuibian_withoutinnermongolia, index = c("name_en", "year"), drop.index = TRUE, row.names = TRUE)
migration.panelring = pdata.frame(mergedhuibian_ring, index = c("name_en", "year"), drop.index = TRUE, row.names = TRUE)

ethnic.panel53_82withGLF = pdata.frame(ethnicmerged, index = c("Province", "Year"), drop.index = TRUE, row.names = TRUE)

#TABLES

# TABLE 3


model1 <- plm(log_in_migration ~   BorderUSSR*SSSplit, data = migration.panel, model = "fd", effects = "individual" )
model2 <- plm(log_in_migration ~   BorderUSSR*SSSplit + GDP_Index_1952 + school + death_rate + grainpc + grainpc_g + GrossIndustrialOutput.Value_100millionyuan +  Length_Highways_km, data = migration.panel2, model = "fd", effects = "individual" )
model3 <- plm(lognetmigration1 ~   BorderUSSR*SSSplit , data = migration.panel2, model = "fd", effects = "individual" )
model4 <- plm(lognetmigration1 ~   BorderUSSR*SSSplit + GDP_Index_1952 + school + death_rate + grainpc + grainpc_g + GrossIndustrialOutput.Value_100millionyuan + Length_Highways_km, data = migration.panel2, model = "fd", effects = "individual" )
model5 <- plm(pop_3 ~   BorderUSSR*SSSplit, data = migration.panel, model = "fd", effects = "individual" )
model6 <- plm(pop_3 ~   BorderUSSR*SSSplit + GDP_Index_1952 + school + death_rate + grainpc + grainpc_g + GrossIndustrialOutput.Value_100millionyuan +  Length_Highways_km, data = migration.panel2, model = "fd", effects = "individual" )


stargazer(coeftest(model1, vcov=vcovHC(model1, method = "arellano", type="HC0",cluster="group")) 
          ,coeftest(model2, vcov=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
          ,coeftest(model3, vcov=vcovHC(model3, method = "arellano", type="HC0",cluster="group")),
          coeftest(model4, vcov=vcovHC(model4, method = "arellano", type="HC0",cluster="group")),
          coeftest(model5, vcov=vcovHC(model5, method = "arellano", type="HC0",cluster="group")),
          coeftest(model6, vcov=vcovHC(model6, method = "arellano", type="HC0",cluster="group")),
          digits = 2)

#linearHypothesis for F statistic p values

linearHypothesis(model1, c("SSSplit =  SSSplit + BorderUSSR:SSSplit"), test =  "F", vcov.=vcovHC(model1, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model2, c("SSSplit =  SSSplit + BorderUSSR:SSSplit"), test =  "F", vcov.=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model3, c("SSSplit =  SSSplit + BorderUSSR:SSSplit"), test =  "F", vcov.=vcovHC(model3, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model4, c("SSSplit =  SSSplit + BorderUSSR:SSSplit"), test =  "F", vcov.=vcovHC(model4, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model5, c("SSSplit =  SSSplit + BorderUSSR:SSSplit"), test =  "F", vcov.=vcovHC(model5, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model6, c("SSSplit = SSSplit + BorderUSSR:SSSplit"), test =  "F", vcov.=vcovHC(model6, method = "arellano", type="HC0",cluster="group"))


#TABLE 4

model1 <- plm(pop_russian ~ SSSplit , data = xinjiang.panel, model = "fd", effects = "individual" )
model2 <- plm(pop_han ~ SSSplit , data = xinjiang.panel, model = "fd", effects = "individual" )
model3 <- plm(pop_hui ~ SSSplit , data = xinjiang.panel, model = "fd", effects = "individual" )
model4 <- plm(pop_kyrgyz ~ SSSplit , data = xinjiang.panel, model = "fd", effects = "individual" )
model5 <- plm(pop_kazakh ~ SSSplit , data = xinjiang.panel, model = "fd", effects = "individual" )
model6 <- plm(pop_uighur ~ SSSplit, data = xinjiang.panel, model = "fd", effects = "individual")

stargazer(coeftest(model1, vcov=vcovHC(model1, method = "arellano", type="HC0",cluster="group")) 
          ,coeftest(model2, vcov=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
          ,coeftest(model3, vcov=vcovHC(model3, method = "arellano", type="HC0",cluster="group")),
          coeftest(model4, vcov=vcovHC(model4, method = "arellano", type="HC0",cluster="group")),
          coeftest(model5, vcov=vcovHC(model5, method = "arellano", type="HC0",cluster="group")),
          coeftest(model6, vcov=vcovHC(model6, method = "arellano", type="HC0",cluster="group")),
          digits = 2)

#TABLE 5


model1 <- plm(logpopbingtuan ~ pop_russian*SSSplit , data = xinjiang.panel, model = "fd", effects = "individual" )
model2 <- plm(logpopbingtuan ~ pop_russian*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )
model3 <- plm(loghanperc ~ pop_russian*SSSplit , data = xinjiang.panel, model = "fd", effects = "individual" )
model4 <- plm(loghanperc ~ pop_russian*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )

stargazer(coeftest(model1, vcov=vcovHC(model1, method = "arellano", type="HC0",cluster="group")) 
          ,coeftest(model2, vcov=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
          ,coeftest(model3, vcov=vcovHC(model3, method = "arellano", type="HC0",cluster="group")),
          coeftest(model4, vcov=vcovHC(model4, method = "arellano", type="HC0",cluster="group")),
          digits = 2)

linearHypothesis(model1, c("pop_russian = pop_russian + pop_russian:SSSplit"), test =  "F", vcov.=vcovHC(model1, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model2, c("pop_russian = pop_russian + pop_russian:SSSplit"), test =  "F", vcov.=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model3, c("pop_russian = pop_russian + pop_russian:SSSplit"), test =  "F", vcov.=vcovHC(model3, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model4, c("pop_russian = pop_russian + pop_russian:SSSplit"), test =  "F", vcov.=vcovHC(model4, method = "arellano", type="HC0",cluster="group"))

#TABLE 6

model1 <- plm(logpopbingtuan ~ North*SSSplit , data = xinjiang.panel, model = "fd", effects = "individual" )
model2 <- plm(logpopbingtuan ~ borderussrnorth*SSSplit , data = xinjiang.panel, model = "fd", effects = "individual")
model3 <- plm(logpopbingtuan ~ USSRNorthDist*SSSplit, data = xinjiang.panel, model = "fd", effects = "individual")
model4 <- plm(loghanperc ~ North*SSSplit  , data = xinjiang.panel, model = "fd", effects = "individual" )
model5 <- plm(loghanperc ~ borderussrnorth*SSSplit , data = xinjiang.panel, model = "fd", effects = "individual")
model6 <- plm(loghanperc ~ USSRNorthDist*SSSplit , data = xinjiang.panel, model = "fd", effects = "individual")

stargazer(coeftest(model1, vcov=vcovHC(model1, method = "arellano", type="HC0",cluster="group")), 
          coeftest(model2, vcov=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
          ,coeftest(model3, vcov=vcovHC(model3, method = "arellano", type="HC0",cluster="group")),
          coeftest(model4, vcov=vcovHC(model4, method = "arellano", type="HC0",cluster="group")),
          coeftest(model5, vcov=vcovHC(model5, method = "arellano", type="HC0",cluster="group")),
          coeftest(model6, vcov=vcovHC(model6, method = "arellano", type="HC0",cluster="group")),
          digits = 2)

linearHypothesis(model1, c("SSSplit = SSSplit + North:SSSplit"), test =  "F", vcov.=vcovHC(model1, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model2, c("SSSplit = SSSplit + borderussrnorth:SSSplit"), test =  "F", vcov.=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model3, c("SSSplit = SSSplit + USSRNorthDist:SSSplit"), test =  "F", vcov.=vcovHC(model3, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model4, c("SSSplit =SSSplit + North:SSSplit"), test =  "F", vcov.=vcovHC(model4, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model5, c("SSSplit = SSSplit + borderussrnorth:SSSplit"), test =  "F", vcov.=vcovHC(model5, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model6, c("SSSplit = SSSplit + USSRNorthDist:SSSplit"), test =  "F", vcov.=vcovHC(model6, method = "arellano", type="HC0",cluster="group"))


#TABLE 7

model1 <- plm(pop_russian ~ North*SSSplit , data = xinjiang.panel, model = "fd", effects = "individual" )
model2 <- plm(pop_russian ~ North*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )
model3 <- plm(pop_russian ~ borderussrnorth*SSSplit , data = xinjiang.panel, model = "fd", effects = "individual")
model4 <- plm(pop_russian ~ borderussrnorth*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual")
model5 <- plm(pop_russian ~ USSRNorthDist*SSSplit, data = xinjiang.panel, model = "fd", effects = "individual")
model6 <- plm(pop_russian ~ USSRNorthDist*SSSplit, data = xinjiang.panel, model = "pooling", effects = "individual")

stargazer(coeftest(model1, vcov=vcovHC(model1, method = "arellano", type="HC0",cluster="group")) 
          ,coeftest(model2, vcov=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
          ,coeftest(model3, vcov=vcovHC(model3, method = "arellano", type="HC0",cluster="group")),
          coeftest(model4, vcov=vcovHC(model4, method = "arellano", type="HC0",cluster="group")),
          coeftest(model5, vcov=vcovHC(model5, method = "arellano", type="HC0",cluster="group")),
          coeftest(model6, vcov=vcovHC(model6, method = "arellano", type="HC0",cluster="group")),
          digits = 2)


linearHypothesis(model1, c("SSSplit = SSSplit + North:SSSplit"), test =  "F", vcov.=vcovHC(model1, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model2, c("SSSplit = SSSplit + North + North:SSSplit"), test =  "F", vcov.=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model3, c("SSSplit = SSSplit + borderussrnorth:SSSplit"), test =  "F", vcov.=vcovHC(model3, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model4, c("SSSplit = SSSplit + borderussrnorth + borderussrnorth:SSSplit"), test =  "F", vcov.=vcovHC(model4, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model5, c("SSSplit = SSSplit + USSRNorthDist:SSSplit"), test =  "F", vcov.=vcovHC(model5, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model6, c("SSSplit = SSSplit + USSRNorthDist + USSRNorthDist:SSSplit"), test =  "F", vcov.=vcovHC(model6, method = "arellano", type="HC0",cluster="group"))


#TABLE 8

model1 <- plm(russian ~ border_china*SinoSovietSplit , data = russia.panel, model = "fd", effects = "individual" )
model2 <- plm(russian ~ border_china*SinoSovietSplit , data = russia.panel, model = "pooling", effects = "individual" )
model3 <- plm(chinese ~ border_china*SinoSovietSplit , data = russia.panel, model = "fd", effects = "individual" )
model4 <- plm(chinese ~ border_china*SinoSovietSplit , data = russia.panel, model = "pooling", effects = "individual" )
model5 <- plm(total ~ border_china*SinoSovietSplit , data = russia.panel, model = "fd", effects = "individual" )
model6 <- plm(total ~ border_china*SinoSovietSplit , data = russia.panel, model = "pooling", effects = "individual" )

stargazer(coeftest(model1, vcov=vcovHC(model1, method = "arellano", type="HC0",cluster="group")) 
          ,coeftest(model2, vcov=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
          ,coeftest(model3, vcov=vcovHC(model3, method = "arellano", type="HC0",cluster="group")),
          coeftest(model4, vcov=vcovHC(model4, method = "arellano", type="HC0",cluster="group")),
          coeftest(model5, vcov=vcovHC(model5, method = "arellano", type="HC0",cluster="group")),
          coeftest(model6, vcov=vcovHC(model6, method = "arellano", type="HC0",cluster="group")),
          digits = 0)

linearHypothesis(model1, c("SinoSovietSplit = SinoSovietSplit + border_china:SinoSovietSplit"), test =  "F", vcov.=vcovHC(model1, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model2, c("SinoSovietSplit = border_china + SinoSovietSplit + border_china:SinoSovietSplit"), test =  "F", vcov.=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model3, c("SinoSovietSplit = SinoSovietSplit + border_china:SinoSovietSplit"), test =  "F", vcov.=vcovHC(model3, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model4, c("SinoSovietSplit = border_china + SinoSovietSplit + border_china:SinoSovietSplit"), test =  "F", vcov.=vcovHC(model4, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model5, c("SinoSovietSplit = SinoSovietSplit + border_china:SinoSovietSplit"), test =  "F", vcov.=vcovHC(model5, method = "arellano", type="HC0",cluster="group"))
linearHypothesis(model6, c("SinoSovietSplit = border_china + SinoSovietSplit + border_china:SinoSovietSplit"), test =  "F", vcov.=vcovHC(model6, method = "arellano", type="HC0",cluster="group"))



#APPENDIX TABLES



#APPENDIX TABLE 9
describe(mergedhuibian$log_in_migration)
describe(mergedhuibian$log_out_migration)
describe(mergedhuibian$lognetmigration1)
describe(mergedhuibian$pop_3)
describe(mergedhuibian$BorderUSSR)
describe(mergedhuibian$SSSplit)
describe(mergedhuibian$GDP_Index_1952)
describe(mergedhuibian$school)
describe(mergedhuibian2$death_rate)
describe(mergedhuibian$GrossIndustrialOutput.Value_100millionyuan)
describe(mergedhuibian2$grainpc)
describe(mergedhuibian2$grainpc_g)
describe(mergedhuibian$Length_Highways_km)




#APPENDIX TABLE 10

describe(xinjiang.panel$logpopbingtuan)
describe(xinjiang.panel$loghanperc)
describe(xinjiang.panel$SSSplit)
describe(xinjiang.panel$North)
describe(xinjiang.panel$borderussrnorth)
describe(xinjiang.panel$USSRNorthDist)
describe(xinjiang.panel$USSRBorderDist)
describe(xinjiang.panel$contested)
describe(xinjiang.panel$Alt_MEAN)
describe(xinjiang.panel$pop_russian)
describe(xinjiang.panel$pop_kyrgyz)
describe(xinjiang.panel$pop_uighur)
describe(xinjiang.panel$pop_kazakh)
describe(xinjiang.panel$pop_han)
describe(xinjiang.panel$pop_hui)


#APPENDIX TABLE 11
describe(russia$border_china)
describe(russia$SinoSovietSplit)
describe(russia$russian)
describe(russia$chinese)
describe(russia$total)


#APPENDIX TABLE 15
#To run gsynth, use R version 3.3.2

out <- gsynth(log_in_migration ~ D , 
              data = migr_trim, 
              index = c("ProvID","Year"), force = "two-way",
              CV = TRUE, r = c(0, 3), se = TRUE, 
              inference = "nonparametric", nboots = 1000,
              parallel = FALSE, seed = 12345)

print(out)

out2 <- gsynth(lognetmigration ~ D , 
              data = migr_trim, 
              index = c("ProvID","Year"), force = "two-way",
              CV = TRUE, r = c(0, 3), se = TRUE, 
              inference = "nonparametric", nboots = 1000,
              parallel = FALSE, seed = 12345)

print(out2)

out3 <- gsynth(pop_3 ~ D , 
               data = migr_trim, 
               index = c("ProvID","Year"), force = "two-way",
               CV = TRUE, r = c(0, 3), se = TRUE, 
               inference = "nonparametric", nboots = 1000,
               parallel = FALSE, seed = 12345)

print(out3)

#APPENDIX TABLE 16

model2 <- plm(log_in_migration ~   BorderUSSR*SSSplit + GDP_Index_1952 + school + death_rate + grainpc + grainpc_g + GrossIndustrialOutput.Value_100millionyuan + Length_Highways_km, data = migration.panel3, model = "fd", effects = "individual" ) 
model3 <- plm(log_in_migration ~   BorderUSSR*SSSplit +  GDP_Index_1952 + school + death_rate + grainpc + grainpc_g + GrossIndustrialOutput.Value_100millionyuan + Length_Highways_km, data = migration.panelXJ, model = "fd", effects = "individual" ) 
model4 <- plm(log_in_migration ~   BorderUSSR*SSSplit +  GDP_Index_1952 + school + death_rate + grainpc + grainpc_g + GrossIndustrialOutput.Value_100millionyuan + Length_Highways_km, data = migration.panelJL, model = "fd", effects = "individual" ) 
model5 <- plm(log_in_migration ~   BorderUSSR*SSSplit +  GDP_Index_1952 + school + death_rate + grainpc + grainpc_g + GrossIndustrialOutput.Value_100millionyuan +  Length_Highways_km, data = migration.panelHLJ, model = "fd", effects = "individual" ) 
model6 <- plm(log_in_migration ~   BorderUSSR*SSSplit +  GDP_Index_1952 + school + death_rate + grainpc + grainpc_g + GrossIndustrialOutput.Value_100millionyuan + Length_Highways_km, data = migration.panelIM, model = "fd", effects = "individual" ) 

stargazer( coeftest(model2, vcov=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
           ,coeftest(model3, vcov=vcovHC(model3, method = "arellano", type="HC0",cluster="group"))
           ,coeftest(model4, vcov=vcovHC(model4, method = "arellano", type="HC0",cluster="group"))
           ,coeftest(model5, vcov=vcovHC(model5, method = "arellano", type="HC0",cluster="group"))
           ,coeftest(model6, vcov=vcovHC(model6, method = "arellano", type="HC0",cluster="group")), digits = 2)


#APPENDIX TABLE 17

model1 <- plm(log_in_migration ~   BorderUSSR*SSSplit, data = migration.panel, model = "pooling", effects = "individual" )
model2 <- plm(log_in_migration ~   BorderUSSR*SSSplit + GDP_Index_1952 + school + death_rate + grainpc + grainpc_g + GrossIndustrialOutput.Value_100millionyuan +  Length_Highways_km, data = migration.panel2, model = "pooling", effects = "individual" )
model3 <- plm(lognetmigration1 ~   BorderUSSR*SSSplit , data = migration.panel2, model = "pooling", effects = "individual" )
model4 <- plm(lognetmigration1 ~   BorderUSSR*SSSplit + GDP_Index_1952 + school + death_rate + grainpc + grainpc_g + GrossIndustrialOutput.Value_100millionyuan + Length_Highways_km, data = migration.panel2, model = "pooling", effects = "individual" )
model5 <- plm(pop_3 ~   BorderUSSR*SSSplit, data = migration.panel, model = "pooling", effects = "individual" )
model6 <- plm(pop_3 ~   BorderUSSR*SSSplit + GDP_Index_1952 + school + death_rate + grainpc + grainpc_g + GrossIndustrialOutput.Value_100millionyuan +  Length_Highways_km, data = migration.panel2, model = "pooling", effects = "individual" )

stargazer(coeftest(model1, vcov=vcovHC(model1, method = "arellano", type="HC0",cluster="group")) 
          ,coeftest(model2, vcov=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
          ,coeftest(model3, vcov=vcovHC(model3, method = "arellano", type="HC0",cluster="group")),
          coeftest(model4, vcov=vcovHC(model4, method = "arellano", type="HC0",cluster="group")),
          coeftest(model5, vcov=vcovHC(model5, method = "arellano", type="HC0",cluster="group")),
          coeftest(model6, vcov=vcovHC(model6, method = "arellano", type="HC0",cluster="group")),
          digits = 2)


#APPENDIX TABLE 18


model1 <- plm(log_in_migration ~   BorderUSSR*SSSplit, data = migration.panelring, model = "fd", effects = "individual" )
model2 <- plm(log_in_migration ~   BorderUSSR*SSSplit + GDP_Index_1952 + school + death_rate + grainpc + grainpc_g + GrossIndustrialOutput.Value_100millionyuan +  Length_Highways_km, data = migration.panelring, model = "fd", effects = "individual" )
model3 <- plm(lognetmigration1 ~   BorderUSSR*SSSplit , data = migration.panelring, model = "fd", effects = "individual" )
model4 <- plm(lognetmigration1 ~   BorderUSSR*SSSplit + GDP_Index_1952 + school + death_rate + grainpc + grainpc_g + GrossIndustrialOutput.Value_100millionyuan + Length_Highways_km, data = migration.panelring, model = "fd", effects = "individual" )
model5 <- plm(pop_3 ~   BorderUSSR*SSSplit, data = migration.panelring, model = "fd", effects = "individual" )
model6 <- plm(pop_3 ~   BorderUSSR*SSSplit + GDP_Index_1952 + school + death_rate + grainpc + grainpc_g + GrossIndustrialOutput.Value_100millionyuan +  Length_Highways_km, data = migration.panelring, model = "fd", effects = "individual" )

stargazer(coeftest(model1, vcov=vcovHC(model1, method = "arellano", type="HC0",cluster="group")) 
          ,coeftest(model2, vcov=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
          ,coeftest(model3, vcov=vcovHC(model3, method = "arellano", type="HC0",cluster="group")),
          coeftest(model4, vcov=vcovHC(model4, method = "arellano", type="HC0",cluster="group")),
          coeftest(model5, vcov=vcovHC(model5, method = "arellano", type="HC0",cluster="group")),
          coeftest(model6, vcov=vcovHC(model6, method = "arellano", type="HC0",cluster="group")),
          digits = 2)


#APPENDIX TABLE 19

model1 <- plm(logminorityperc ~ BorderUSSR*year53_82  ,model = "pooling", effects = "twoways", data =  ethnic.panel53_82withGLF)
model2 <- plm(logminorityperc ~ BorderUSSR*year53_82  ,model = "fd", effects = "twoways", data =  ethnic.panel53_82withGLF)
model3 <- plm(logminorityperc ~ BorderUSSR*year53_82 +  GDP_Index_1952 +  death_rate + grainpc + grainpc_g +  GrossIndustrialOutput.Value_100millionyuan +  Length_Highways_km ,model = "pooling", effects = "twoways", data = ethnic.panel53_82withGLF)
model4 <- plm(logminorityperc ~ BorderUSSR*year53_82 + GDP_Index_1952  + death_rate + grainpc + grainpc_g +  GrossIndustrialOutput.Value_100millionyuan + Length_Highways_km,model = "fd", effects = "twoways", data = ethnic.panel53_82withGLF)

stargazer(coeftest(model1, vcov=vcovHC(model1, method = "arellano", type="HC0",cluster="group"))
          , coeftest(model2, vcov=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
          , coeftest(model3, vcov=vcovHC(model3, method = "arellano", type="HC0",cluster="group"))
          , coeftest(model4, vcov=vcovHC(model4, method = "arellano", type="HC0",cluster="group")),
          digits = 2)

#APPENDIX TABLE 20

model1 <- plm(logpopbingtuan ~ North*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )
model2 <- plm(logpopbingtuan ~ borderussrnorth*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual")
model3 <- plm(logpopbingtuan ~ USSRNorthDist*SSSplit, data = xinjiang.panel, model = "pooling", effects = "individual")
model4 <- plm(loghanperc ~ North*SSSplit  , data = xinjiang.panel, model = "pooling", effects = "individual" )
model5 <- plm(loghanperc ~ borderussrnorth*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual")
model6 <- plm(loghanperc ~ USSRNorthDist*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual")


stargazer(coeftest(model1, vcov=vcovHC(model1, method = "arellano", type="HC0",cluster="group")) 
          ,coeftest(model2, vcov=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
          ,coeftest(model3, vcov=vcovHC(model3, method = "arellano", type="HC0",cluster="group")),
          coeftest(model4, vcov=vcovHC(model4, method = "arellano", type="HC0",cluster="group")),
          coeftest(model5, vcov=vcovHC(model5, method = "arellano", type="HC0",cluster="group")),
          coeftest(model6, vcov=vcovHC(model6, method = "arellano", type="HC0",cluster="group")),
          digits = 2)

#APPENDIX TABLE 21


model1 <- plm(logpopbingtuan ~ contested*SSSplit , data = xinjiang.panel, model = "fd", effects = "individual" )
model2 <- plm(logpopbingtuan ~ contested*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )
model3 <- plm(loghanperc ~ contested*SSSplit , data = xinjiang.panel, model = "fd", effects = "individual" )
model4 <- plm(loghanperc ~ contested*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )

stargazer(coeftest(model1, vcov=vcovHC(model1, method = "arellano", type="HC0",cluster="group")) 
          ,coeftest(model2, vcov=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
          ,coeftest(model3, vcov=vcovHC(model3, method = "arellano", type="HC0",cluster="group")),
          coeftest(model4, vcov=vcovHC(model4, method = "arellano", type="HC0",cluster="group")),
          digits = 2)

#APPENDIX TABLE 22

model1 <- plm(russian ~ chinese*SinoSovietSplit , data = russia.panel, model = "fd", effects = "individual" )
model2 <- plm(russian ~ chinese*SinoSovietSplit , data = russia.panel, model = "pooling", effects = "individual" )
model3 <- plm(total ~ chinese*SinoSovietSplit , data = russia.panel, model = "fd", effects = "individual" )
model4 <- plm(total ~ chinese*SinoSovietSplit , data = russia.panel, model = "pooling", effects = "individual" )

stargazer(coeftest(model1, vcov=vcovHC(model1, method = "arellano", type="HC0",cluster="group")) 
          ,coeftest(model2, vcov=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
          ,coeftest(model3, vcov=vcovHC(model3, method = "arellano", type="HC0",cluster="group")),
          coeftest(model4, vcov=vcovHC(model4, method = "arellano", type="HC0",cluster="group")))


#FIGURES
#To run gsynth, use R version 3.3.2
# Figure 4
out <- gsynth(log_in_migration ~ D , 
              data = migr_trim, 
              index = c("ProvID","Year"), force = "two-way",
              CV = TRUE, r = c(0, 3), se = TRUE, 
              inference = "nonparametric", nboots = 1000,
              parallel = FALSE, seed = 12345)

plot(out, type = "counterfactual", raw = "none",ylab = "Log Inward Migration", xlab = "Year",
     ylim = c(12.9,14.3), legendOff = T,   main = " Diff-in-Diff migration")

# Figure 5
out <- gsynth(pop_3 ~ D , 
              data = migr_trim, 
              index = c("ProvID","Year"), force = "two-way",
              CV = TRUE, r = c(0, 3), se = TRUE, 
              inference = "nonparametric", nboots = 1000,
              parallel = FALSE, seed = 12345)

plot(out, type = "counterfactual", raw = "none",ylab = "Total Population (10,000s)", xlab = "Year",
     ylim = c(750,2000), legendOff = T,   main = " Diff-in-Diff population")


# FIGURE 6

model1 <- plm(logpopbingtuan ~ pop_russian*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )
model2 <- plm(logpopbingtuan ~ pop_hui*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )
model3 <- plm(logpopbingtuan ~ pop_kyrgyz*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )
model4 <- plm(logpopbingtuan ~ pop_kazakh*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )
model5 <- plm(logpopbingtuan ~ pop_uighur*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )
model6 <- plm(logpopbingtuan ~ pop_han*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )

M1 <- coeftest(model1, vcov=vcovHC(model1, method = "arellano", type="HC0",cluster="group"))
M2 <- coeftest(model2, vcov=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
M3 <- coeftest(model3, vcov=vcovHC(model3, method = "arellano", type="HC0",cluster="group"))
M4 <- coeftest(model4, vcov=vcovHC(model4, method = "arellano", type="HC0",cluster="group"))
M5 <- coeftest(model5, vcov=vcovHC(model5, method = "arellano", type="HC0",cluster="group"))
M6 <- coeftest(model6, vcov=vcovHC(model6, method = "arellano", type="HC0",cluster="group"))

m1_df  <- tidy(M1)
m1_df <- mutate(m1_df, model = "Russian")
m2_df  <- tidy(M2)
m2_df <- mutate(m2_df, model = "Hui")
m3_df  <- tidy(M3)
m3_df <- mutate(m3_df, model = "Kyrgyz")
m4_df  <- tidy(M4)
m4_df <- mutate(m4_df, model = "Kazakh")
m5_df  <- tidy(M5)
m5_df <- mutate(m5_df, model = "Uighur")
m6_df  <- tidy(M6)
m6_df <- mutate(m6_df, model = "Han")

one_models <- rbind(m1_df, m2_df, m3_df, m4_df, m5_df, m6_df)
one_models
one_modelsnointercept <- one_models[c(4,8,12,16,20,24),]
one_modelsnointercept
class(one_modelsnointercept)
dim(one_modelsnointercept)
rownames(one_modelsnointercept) <- c("Pop. Russian x Split","Pop. Hui x Split","Pop. Kyrgyz x Split",
                                     "Pop. Kazakh x Split","Pop. Uighur x Split","Pop. Han x Split")
one_modelsnointercept$term <- rownames(one_modelsnointercept)

png(file = "plot_result_xinjiang_russian_bingtuan_new.png",res = 300,
    width = 10,height = 5,units="in")

one_modelsnointercept$term2 <- c(6,5,4,3,2,1)

ggplot() +
  geom_point(data=one_modelsnointercept,aes(x=estimate, y=term2), size=3,
             color=ifelse(one_modelsnointercept$term2==6, "firebrick", "black")) + 
  geom_errorbarh(data=one_modelsnointercept, aes(xmin=estimate - 1.96 * std.error, 
                                                 xmax= estimate + 1.96 * std.error, y=term2),
                 height=.15, color=ifelse(one_modelsnointercept$term2==6, "firebrick", "black")) + 
  geom_vline(xintercept = 0, linetype=2) +
  scale_x_continuous(limits=c(-0.035, 0.05), breaks=seq(-0.03, 0.05, 0.01)) + 
  scale_y_continuous(breaks=c(6,5,4,3,2,1), 
                     labels=c("Pop. Russian x Split",
                              "Pop. Hui x Split","Pop. Kyrgyz x Split",
                              "Pop. Kazakh x Split","Pop. Uighur x Split","Pop. Han x Split")) +
  xlab("Interaction Coefficient Estimate") + 
  ylab("") + 
  theme_bw() +   
  theme(plot.title = element_text(hjust = 0.5, size=15, face="bold"),
        axis.text.x=element_text(size=13),
        axis.text.y = element_text(face="bold", size=13),        
        axis.title=element_text(size=14,face="bold"),
        legend.title=element_blank(),
        legend.position="none") 

dev.off()

# FIGURE 7

model1 <- plm(loghanperc ~ pop_russian*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )
model2 <- plm(loghanperc ~ pop_hui*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )
model3 <- plm(loghanperc ~ pop_kyrgyz*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )
model4 <- plm(loghanperc ~ pop_kazakh*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )
model5 <- plm(loghanperc ~ pop_uighur*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )
model6 <- plm(loghanperc ~ pop_han*SSSplit , data = xinjiang.panel, model = "pooling", effects = "individual" )


M1 <- coeftest(model1, vcov=vcovHC(model1, method = "arellano", type="HC0",cluster="group"))
M2 <- coeftest(model2, vcov=vcovHC(model2, method = "arellano", type="HC0",cluster="group"))
M3 <- coeftest(model3, vcov=vcovHC(model3, method = "arellano", type="HC0",cluster="group"))
M4 <- coeftest(model4, vcov=vcovHC(model4, method = "arellano", type="HC0",cluster="group"))
M5 <- coeftest(model5, vcov=vcovHC(model5, method = "arellano", type="HC0",cluster="group"))
M6 <- coeftest(model6, vcov=vcovHC(model6, method = "arellano", type="HC0",cluster="group"))

m1_df  <- tidy(M1)
m1_df <- mutate(m1_df, model = "Russian")
m2_df  <- tidy(M2)
m2_df <- mutate(m2_df, model = "Hui")
m3_df  <- tidy(M3)
m3_df <- mutate(m3_df, model = "Kyrgyz")
m4_df  <- tidy(M4)
m4_df <- mutate(m4_df, model = "Kazakh")
m5_df  <- tidy(M5)
m5_df <- mutate(m5_df, model = "Uighur")
m6_df  <- tidy(M6)
m6_df <- mutate(m6_df, model = "Han")


one_models <- rbind(m1_df, m2_df, m3_df, m4_df, m5_df, m6_df)
one_models
one_modelsnointercept <- one_models[c(4,8,12,16,20,24),]
one_modelsnointercept
class(one_modelsnointercept)
dim(one_modelsnointercept)
rownames(one_modelsnointercept) <- c("Pop. Russian: Split","Pop. Hui: Split","Pop. Kyrgyz: Split",
                                     "Pop. Kazakh: Split","Pop. Uighur: Split","Pop. Han: Split")
one_modelsnointercept$term <- rownames(one_modelsnointercept)


png(file = "plot_result_xinjiang_russian_han_new.png",res = 300,
    width = 10,height = 5,units="in")

one_modelsnointercept$term2 <- c(6,5,4,3,2,1)

ggplot() +
  geom_point(data=one_modelsnointercept,aes(x=estimate, y=term2), size=3,
             color=ifelse(one_modelsnointercept$term2==6, "firebrick", "black")) + 
  geom_errorbarh(data=one_modelsnointercept, aes(xmin=estimate - 1.96 * std.error, 
                                                 xmax= estimate + 1.96 * std.error, y=term2),
                 height=.15, color=ifelse(one_modelsnointercept$term2==6, "firebrick", "black")) + 
  geom_vline(xintercept = 0, linetype=2) +
  scale_x_continuous(limits=c(-0.001, 0.0015), breaks=seq(-0.001, 0.0015, 0.0005)) + 
  scale_y_continuous(breaks=c(6,5,4,3,2,1), 
                     labels=c("Pop. Russian x Split",
                              "Pop. Hui x Split","Pop. Kyrgyz x Split",
                              "Pop. Kazakh x Split","Pop. Uighur x Split","Pop. Han x Split")) +
  xlab("Interaction Coefficient Estimate") + 
  ylab("") + 
  theme_bw() +   
  theme(plot.title = element_text(hjust = 0.5, size=15, face="bold"),
        axis.text.x=element_text(size=13),
        axis.text.y = element_text(face="bold", size=13),        
        axis.title=element_text(size=14,face="bold"),
        legend.title=element_blank(),
        legend.position="none") 

dev.off()
